Final Infographic

Author

Luna Herschenfeld-Catalán she/her

Published

March 9, 2024

Data

Wrangle and Clean Data

https://www.census.gov/cgi-bin/geo/shapefiles/index.php

# geometry of the puma 2022 shapefiles
puma <- read_sf(here("data", "tl_2022_06_puma20", "tl_2022_06_puma20.shp"))

puma_la <- puma %>% 
  filter(grepl(037, PUMACE20)) %>% 
  rename(PUMA = PUMACE20)

pums_housing_22 <- read_csv(here::here("data", "pums_2022.csv"))

# https://www.huduser.gov/portal/datasets/il/il2020/2020summary.odn
people <- c(1, 2, 3, 4, 5, 6, 7, 8)
very_low <- c(39450, 45050, 50700, 56300, 60850, 65350, 69850, 74350)
extreme_low <- c(23700, 27050, 30450, 33800, 36550, 39250, 41950, 44650)
low <- c(63100, 72100, 81100, 90100, 97350, 104550, 111750, 118950)

income_breaks <- data.frame(people, extreme_low, very_low, low)

# remove the duplicates from 2022 data
puma_house_codes <- pums_housing_22 %>% 
  select(SERIALNO, PUMA) %>% 
  distinct()

puma_22 <- pums_housing_22 %>% 
  inner_join(puma_la, by = "PUMA") %>% 
  select(SERIALNO, WGTP, NP, HINCP, FINCP, GRPIP) %>% 
  mutate(NP = as.numeric(NP),
         HINCP = as.numeric(HINCP),
         FINCP = as.numeric(FINCP),
         GRPIP = as.numeric(GRPIP)) %>% 
  group_by(SERIALNO) %>% 
  summarize_all(mean, na.rm = TRUE) %>% 
  left_join(puma_house_codes)

la_tract_vacant <- get_acs(
  state = "CA",
  county = "Los Angeles",
  geography = "tract",
  variables = "B25004_001",
  geometry = TRUE,
  year = 2022
)

la_tract <- la_tract_vacant %>%
  mutate(NAME = gsub("; Los Angeles County; California", # elements that you want to remove
                                  "", # replace with blank
                                  NAME)) %>%
  mutate(NAME = gsub("Census Tract ", # elements that you want to remove
                                  "", # replace with blank
                                  NAME)) %>% 
  filter(GEOID != "06037599100") %>% # islands
  filter(GEOID != "06037599000") %>% # islands
  filter(GEOID != "06037980003") %>%
  filter(GEOID != "06037980004") %>%
  filter(!(NAME >= 9000 & NAME <= 9800))

Looking at the places where affordable housing projects started

# read in affordable housing projects in LA from 2003 to present
ah_raw <- read_csv(here("data/LAHD_Affordable_Housing_Projects_List__2003_to_Present__20240119.csv")) %>% 
  clean_names()

# clean affordable housing data 
ah_clean <- ah_raw %>% 
  mutate(fun_date = as.Date(date_funded, tryFormats = c("%m/%d/%Y"))) %>% # make date_funded as date
  mutate(year = lubridate::year(fun_date)) %>% # make into year column 
  select(name, year, fun_date, construction_type, site_community, total_units = project_total_units, 
         housing_type, lahd_funded, in_service_date, gps_coords_on_map) %>% 
  mutate(gps_coords_on_map = gsub("[POINT()]", # elements that you want to remove
                                  "", # replace with blank
                                  gps_coords_on_map)) %>% # remove these elements from gps column 
  separate_wider_delim(gps_coords_on_map, 
                       delim = " ", names = c("empty", "coords"), # separate space from before the coordinates
                       too_many = "merge") %>% 
  separate_wider_delim(coords, delim = " ", names = c("Longitude", "Latitude"), # split lat and long coords
                       too_many = "merge") %>% 
  select(-empty) %>% 
  st_as_sf(coords = c("Longitude", "Latitude"), # make into geometry object
                 crs = st_crs(la_tract)) %>% 
  st_join(la_tract) %>% 
  mutate(site_community = str_to_title(site_community))

Assigning income categories to households

Create income breaks for households above 8 (from 9-20)

# very low income 
size <- seq(9, 20, 1)
very_low <- data.frame()
row_n <- data.frame()

for (i in seq_along(size)) {
  income <- 1.32 + (i*.08)
  limit <- income*56300
  row <- size[i]
  row_n <- rbind(row_n, row)
  very_low = rbind(very_low, limit)
}

very_low <- cbind(row_n, very_low) %>% 
  rename(people = X9,
         very_low = X78820)

extreme <- data.frame()
for (i in seq_along(size)) {
  # https://www.huduser.gov/portal/datasets/il/il2020/2020ILCalc3080.odn#calculator
  income <- 44120 + (i*4480)
  extreme <- rbind(extreme, income)
}

low <- data.frame()
for (i in seq_along(size)) {
  income <- 1.32 + (i*.08)
  limit <- income*90100
  low <- rbind(low, limit)
}

cols_9_20 <- cbind(extreme, very_low) %>% 
  rename(extreme_low = X48600) %>% 
  cbind(low) %>% 
  rename(low = X126140)

income_breaks <- rbind(income_breaks, cols_9_20)

Creating For Loop

Applying for loop to the whole data set

puma_22$status <- rep(NA, nrow(puma_22))

for (person in 1:nrow(puma_22)) {
  
  individual <- puma_22$NP[person]
  
  income <- income_breaks %>% 
    filter(people == individual)
  
  #print(individual)
  #print(individual)
  # Define thresholds for labels
  extreme_threshold <- income[[2]]
  very_low_threshold <- income[[3]]
  low_threshold <- income[[4]]
  
  # # Assign labels based on thresholds
  puma_22$status[person] <- ifelse(puma_22[["HINCP"]][person] <= extreme_threshold, 
                              "extreme",
                              ifelse(puma_22[["HINCP"]][person] > extreme_threshold & puma_22[["HINCP"]][person] <= very_low_threshold, 
                                     "very low",
                                     ifelse(puma_22[["HINCP"]][person] > very_low_threshold & puma_22[["HINCP"]][person] <= low_threshold, 
                                            "low",
                                            ifelse(puma_22[["HINCP"]][person] > low_threshold, "not eligible", "non"))))
}

Mapping census tracts in LA

puma_geo <- puma_la %>% 
  select(PUMA, geometry)


puma_plot <- puma_22 %>% 
  group_by(PUMA) %>% 
  count(status) %>% 
  pivot_wider(names_from = "status", values_from = "n") %>% 
  ungroup() %>% 
  clean_names() %>% 
  select(puma, extreme, very_low, low, not_eligible) %>% 
  mutate(PUMA = puma, 
         total_households = rowSums(.[2:5]),
         total_eligible = rowSums(.[2:4]),
         percent_eligible = (total_eligible/total_households) * 100 ) %>% 
  left_join(puma_geo) %>% 
  st_as_sf()

# crop puma plot with the la_tract to remove the top part of LA
puma_plot_crop <- st_crop(puma_plot, la_tract)


# has 2196 points
tract_neighborhood <- read_csv(here("data/Census_Tract_Locations__LA__20240206.csv")) %>% 
  mutate(census_tract = gsub(", Los Angeles County, California", # elements that you want to remove
                                  "", # replace with blank
                                  Tract)) %>% 
  select(-Tract, census_tract, Neighborhood, Latitude, Longitude) %>% 
  st_as_sf(coords = c("Longitude", "Latitude"), # make into geometry object
                 crs = st_crs(puma_plot)) %>% 
  st_crop(puma_plot_crop) # neighborhoods only in the census tracts of interest

# make a df with the puma information and census tract geometry 
puma_tract <- puma_plot_crop %>% # has 67 observations
  st_join(tract_neighborhood,
                join = st_intersects) %>% # has the same number of points as the neighborhood
  clean_names() %>% 
  select(neighborhood, census_tract, location, puma, geometry) %>% 
  mutate(census_tract = gsub("Census Tract ", # elements that you want to remove
                                  "", # replace with blank
                                  census_tract))


# rename the tract column
la_tract <- la_tract %>% 
  rename(census_tract = NAME) %>% 
  select(census_tract, geometry)

puma_tract <- puma_tract %>% 
  st_drop_geometry() %>% 
  left_join(la_tract)

puma_eligibility <- puma_plot %>% 
  st_drop_geometry() %>% 
  select(!PUMA)

# make map of the census tracts with the puma household data
tract_eligibility <- puma_tract %>% 
  left_join(puma_eligibility, by = "puma") %>% 
  filter(!st_is_empty(geometry)) %>% 
  st_as_sf()

Looking at census tract and the percent of income spent on rent: Rent Burdened

rent_burden <- puma_22 %>% 
  group_by(PUMA) %>% 
  rename(percent_rent = GRPIP) %>% 
  select(WGTP, percent_rent, PUMA) %>% 
  mutate(status = ifelse(percent_rent >= 30, "burden", "not_burden")) %>% 
  count(status) %>% 
  pivot_wider(names_from = "status", values_from = "n") %>% 
  ungroup() %>% 
  rename(puma = PUMA) %>% 
  mutate(total = burden+not_burden,
         percent_burden = (burden/total)*100)
  
#plot(rent_burden_2$geometry)

rent_burdened <- puma_tract %>% 
  left_join(rent_burden) %>% 
  filter(!st_is_empty(geometry)) %>% 
  st_as_sf()

Creating a custom theme

library(monochromeR)
library(showtext)
library(ggtext)
library(ggrepel)

# import google fonts 
font_add_google(name = "Josefin Sans",
                family = "josefin") # name we provide ggplot

font_add_google(name = "Sen",
                family = "sen") # name we provide ggplot

font_add_google(name = "Tenor Sans",
                family = "tenorSans") # name we provide ggplot

# Pairs
font_add_google(name = "Playfair Display",
                family = "playfairDisplay") # name we provide ggplot

font_add_google(name = "Alice",
                family = "alice") # name we provide ggplot

# enable show text here that configures font across platforms
showtext_auto()

my_theme <- theme(
  plot.title = element_text(size = 20, face = "bold", family = "playfairDisplay", hjust = 0.5), # Font size set to 16 and bold.
  plot.caption.position = "plot",
  plot.caption = element_text(size = 10, hjust = 0, family = "alice"),
  axis.title = element_text(size = 14), # Font size set to 12.
  axis.text = element_blank(), # no axis text.
  axis.ticks = element_blank(),
  legend.title = element_text(size = 15, face = "bold", family = "alice"), # Font size of the title of the legend set to 12 and bold.
  legend.text = element_text(size = 10), # font size of the text in the legend
  panel.background = element_rect(fill = "white"), # color of the panel background
  panel.grid.major = element_blank(),
  plot.margin = margin(12, 6, 12, 18),
  panel.grid.minor = element_blank(), # invisible auxiliary grids
  plot.background = element_rect(fill = "white"), # plot's background
  strip.background = element_rect(fill = "white", color = "#ff975d"), # strip background
  strip.text = element_text(size = 12, face = "bold", family = "alice"), # strip texts
  legend.position = "top", # position of the legend
  legend.box.background = element_rect(color = "white"), # background of the plot
  legend.key.size = unit(1, "cm") # size of the legends key
)

Figures

my_brew_palette15 <- RColorBrewer::brewer.pal(n = 20, name = 'YlGnBu')
my_brew_palette10 <- RColorBrewer::brewer.pal(n = 10, name = 'RdBu')
my_brew_palette5 <- RColorBrewer::brewer.pal(n = 10, name = 'Reds')

# eligibility of housing in LA
tract_eligibility_percent <- ggplot(tract_eligibility) +
  geom_sf(aes(fill = percent_eligible)) +
  scale_fill_stepsn(colors = my_brew_palette5,
                    labels = scales::label_percent(scale = 1),
                    breaks = scales::breaks_width(width = 7)) +
  labs(title = "Exploring Affordable Housing Needs in Los Angeles County, CA",
       fill = "Percent Eligible",
       caption = "Households Eligible for Affordable Housing by PUMA in Los Angeles, CA") + 
  guides(fill = guide_colorbar(barheight = unit(1, units = "mm"),  
                                 barwidth = unit(100, units = "mm"),
                                 direction = "horizontal",
                                 ticks.colour = "grey20",
                                 title.position = "top",
                                 label.position = "bottom",
                                 title.hjust = 0.5)) +
  my_theme 




rent_burden_plot <- ggplot(rent_burdened) +
  geom_blank() +
  geom_sf(aes(fill = percent_burden)) +
  scale_fill_stepsn(colors = my_brew_palette5,
                    breaks = scales::breaks_width(width = 10),
                    labels = scales::label_percent(scale = 1)) +
  labs(title = "Exploring Affordable Housing Needs in Los Angeles County, CA",
       fill = "Percent Burdened",
       caption = "Percent of Households Rent Burdened by Census Tract in Los Angeles, CA (2022). Dots represent location of affordable housing projects from 2003-2020.") +
  guides(fill = guide_colorbar(barheight = unit(1, units = "mm"),  
                                 barwidth = unit(100, units = "mm"),
                                 direction = "horizontal",
                                 ticks.colour = "grey20",
                                 title.position = "top",
                                 label.position = "bottom",
                                 title.hjust = 0.5)) +
  my_theme


burden_affordable_projects <- rent_burden_plot +
  geom_sf(data = ah_clean, alpha = 0.5, size = 1)

tract_eligibility_percent

burden_affordable_projects

Selecting the Census Tracts of interest

# joining the eligibility and rent data together by census tract
burden_eligible <- rent_burdened %>% 
  st_drop_geometry() %>% 
  left_join(tract_eligibility, by = "census_tract") %>% 
  st_as_sf()

top_burden <- burden_eligible %>% 
  slice_max(order_by = percent_burden,
            n = 75)

# high burden, no affordable housing
burden_non <- top_burden %>% 
  slice_min(order_by = percent_burden, 
            n = 25) 

# high burden, affordable housing
burden_ah <- burden_eligible %>% 
  slice_max(order_by = percent_burden,
            n = 30)

plot(burden_ah$geometry)

# low burden, with affordable housing - and also have high percentage eligible for affordable housing
lowburden_ah <- burden_eligible %>% 
  slice_max(order_by = percent_eligible,
            n = 450) %>% 
  slice_min(order_by = percent_eligible,
            n = 25)

plot(lowburden_ah$geometry)

# low burden, no affordable housing - low percentage of households that qualify for affordable housing
lowburden_non <- burden_eligible %>% 
  slice_min(order_by = percent_eligible,
            n = 100) %>% 
  slice_min(order_by = percent_eligible,
            n = 20)

plot(lowburden_non$geometry)

# neighborhoods right next to undeserved geometry represented by `burden_non`
interest <- burden_eligible %>% 
  slice_min(order_by = percent_burden,
            n = 600) %>% 
  slice_max(order_by = percent_burden,
            n = 15) 

plot(interest$geometry)

# %>% 
#   st_union()

Plotting the three comparison neighborhoods

# high burden, no affordable housing
burden_non_plot <- burden_non %>% 
  st_union() %>% 
  ggplot() +
  geom_sf() +
  labs(title = "Neighborhoods with High Rent Burden, \nNo Affordable Housing") +
  my_theme

# high burden, affordable housing
burden_ah_plot <- burden_ah %>% 
  st_union() %>% 
  ggplot() +
  geom_sf() +
  labs(title = "Neighborhoods with High Rent Burden, \nWith Affordale Housing") +
  my_theme

# low burden, with affordable housing - and also have high percentage eligible for affordable housing
lowburden_ah_plot <- lowburden_ah %>% 
  st_union() %>% 
  ggplot() +
  geom_sf() +
  labs(title = "Neighborhoods with Low Rent Burden, \nWith Affordale Housing",
       subtitle = "and also have high percentage eligible for affordable housing") +
  my_theme

# ow percentage of households that qualify for affordable housing
lowburden_non_plot <- lowburden_non %>% 
  st_union() %>% 
  ggplot() +
  geom_sf() +
  labs(title = "Neighborhoods with Low Rent Burden, \nNo Affordale Housing",
       subtitle = "low percentage of households that qualify for affordable housing") +
  my_theme

interest_plot <- interest %>% 
  st_union() %>% 
  ggplot() +
  geom_sf() +
  labs(title = "Neighborhood with low rent burden right next \nto neighborhood with high rent burden and no support",
       subtitle = "50% percentage of households that qualify for affordable housing, less than 20% rent burden") +
  my_theme

burden_non_plot / burden_ah_plot / lowburden_ah_plot / lowburden_non_plot / 
interest_plot

base_map <- ggplot(rent_burdened) +
  geom_blank() +
  geom_sf(aes(fill = percent_burden), color = "black", lwd = 0.05) +
  scale_fill_stepsn(colors = my_brew_palette5,
                    breaks = scales::breaks_width(width = 10),
                    labels = scales::label_percent(scale = 1)) +
  labs(title = "Exploring Affordable Housing Needs in Los Angeles County, CA",
       fill = "Percent Burdened") +
  guides(fill = guide_colorbar(barheight = unit(1, units = "mm"),  
                                 barwidth = unit(100, units = "mm"),
                                 direction = "horizontal",
                                 ticks.colour = "grey20",
                                 title.position = "top",
                                 label.position = "bottom",
                                 title.hjust = 0.5)) +
  my_theme +
  geom_sf(data = ah_clean, alpha = 0.5, size = 1)

# burden_non points
burden_non_pt <- data.frame(
  y = c(33.9884335),
  x = c(-118.1758795),
  yend = c(33.77222),
  xend = c(-117.92694)
)

# burden_ah points
burden_ah_pt <- data.frame(
  y = c(34.0597395),
  x = c(-118.2982),
    yend = c(33.93291),
  xend = c(-118.66136)
  # yend = c(33.62815),
  # xend = c(- 118.30718)
)

lowburden_ah_pt <- data.frame(
  y = c(34.245882),
  x = c(-118.4209225),
  yend = c(34.31389),
  xend = c(-118.12637)
)

lowburden_non_pt <- data.frame(
  y = c(33.860177),
  x = c(-118.388831),
  yend = c(33.82235),
  xend = c(-118.53424)
)
 
# add the lines to the plot in segments - still editing this 
base_map +
  geom_curve(data = burden_non_pt,
             aes(x = x, y = y, xend = xend, yend = yend),
             arrow = arrow(length = unit(0.01, "npc")),
             #linetype = "dashed",
             curvature = -0.5,
             color = "grey") +
  geom_curve(data = burden_ah_pt,
             aes(x = x, y = y, xend = xend, yend = yend),
             arrow = arrow(length = unit(0.01, "npc")),
             curvature = 0.5,
             color = "grey") +
  geom_curve(data = lowburden_ah_pt,
               aes(x = x, y = y, xend = xend, yend = yend),
              arrow = arrow(length = unit(0.01, "npc")),
             curvature = -0.5,
             color = "grey")

  # geom_segment(data = lowburden_non_pt,
  #              aes(x = x, y = y, xend = xend, yend = yend),
  #              linetype = "dashed",
  #              color = "grey")
RColorBrewer::brewer.pal(n=10,"Reds")
[1] "#FFF5F0" "#FEE0D2" "#FCBBA1" "#FC9272" "#FB6A4A" "#EF3B2C" "#CB181D"
[8] "#A50F15" "#67000D"
order_2015 <- data.frame(
  y = c(200000000),
  x = c(2010),
  yend = c(50000000),
  xend = c(2015)
)

# looking at the total construction of affordable housing in each year 
lahd_funding <- ah_clean %>% 
  drop_na() %>% 
  group_by(year) %>% 
  summarize(total_funding = sum(lahd_funded)) %>% 
  ggplot(aes(x = year, y = total_funding)) +
  geom_col(fill = "#67000D") +
  scale_y_continuous(expand=c(0,0),
                     labels = function(x) paste0(x / 1e6, "M")) +
  geom_curve(data = order_2015,
               aes(x = x, y = y, xend = xend, yend = yend),
              arrow = arrow(length = unit(0.01, "npc")),
             size = 0.1,
             curvature = -0.5,
             color = "black") +
  annotate(geom = "text", x = 2009, y = 200000000, 
           label = "2015 Directive Order \nfrom Mayor XYZ to facilitate the building \nof affordable housing.") +
  theme_classic() +
  labs(x = 'Year',
       y = 'Total Funding (in millions)')

lahd_funding

Next steps

  • get information for these areas
  • Number of households (approximate)
  • How has that changed since 2003

Look at the industry / jobs of people in the census tract

This data does not exist

# load list of variables that I could access
variables <- load_variables(2022, "acs1", cache = TRUE)

#B24114 is for Detailed Occupation for the Civilian Employed Population 16 Years and Over
#B24115 is for Detailed Occupation for the Civilian Employed Population 16 Years and Over - Male
#B24116 is for Detailed Occupation for the Civilian Employed Population 16 Years and Over - Female
B24114 <- variables %>% 
  filter(str_detect(name, "B24114"))

#B24114$name

la_tract_occupation <- get_acs(
  state = "CA",
  county = "Los Angeles",
  geography = "state",
  variables = "B24114_100",
  geometry = TRUE,
  year = 2021
)

  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=                                                                     |   2%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |========================================================              |  81%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |===============================================================       |  89%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |======================================================================| 100%
v <- data(acs5_geography)

Try this that looks at the number of workers in household based on size

# B08202 Household Size by Number of Workers in Household

B08202 <- variables %>% 
  filter(str_detect(name, "B08202"))

la_tract_workers <- get_acs(
  state = "CA",
  county = "Los Angeles",
  geography = "state",
  variables = B08202$name,
  geometry = TRUE,
  year = 2022
)

  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |==============================                                        |  42%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |===================================                                   |  51%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |===============================================================       |  89%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |===================================================================== |  98%
  |                                                                            
  |======================================================================|  99%
  |                                                                            
  |======================================================================| 100%

Look at the average age / demographic data by tract

https://walker-data.com/tidycensus/articles/basic-usage.html

ah_tracts <- unique(ah_clean$NAME)
# B01001A is Sex by Age (White Alone)
# B01001B is Sex by Age (Black or African American Alone)
# B01001

 B01001 <- variables %>% 
  filter(str_detect(name, "B01001"))

# has the de
la_tract_age <- get_acs(
  state = "CA",
  county = "Los Angeles",
  geography = "tract",
  variables = B01001$name,
  geometry = TRUE,
  year = 2022
)

tract_ethnicity <- la_tract_age %>%
  mutate(NAME = gsub("; Los Angeles County; California", # elements that you want to remove
                                  "", # replace with blank
                                  NAME)) %>%
  mutate(NAME = gsub("Census Tract ", # elements that you want to remove
                                  "", # replace with blank
                                  NAME)) %>% 
  rename(census_tract = NAME)

#unique(tract_ethnicity$census_tract)

# - make a list of the census tracts of interest 
on_plot <- lowburden_ah %>% 
  mutate(area = "lowburden_ah") %>% 
  rbind(burden_ah %>% mutate(area = "burden_ah")) %>% 
  rbind(burden_non %>%  mutate(area = "burden_non")) %>% 
  rbind(lowburden_non %>% mutate(area = "lowburden_non")) %>% 
  select(census_tract, area)

# - filter the tracts in the age / race df 
sex_age_ethnicity <- tract_ethnicity %>% 
  
  # - replace the variables with more intuitive information 
  mutate(variable = gsub("B01001A",
                       "white",
                       variable),
         variable = gsub("B01001B",
                       "black",
                       variable),
         variable = gsub("B01001C",
                       "ai", # american indian and alaska native
                       variable),
         variable = gsub("B01001D",
                       "asian",
                       variable),
         variable = gsub("B01001E",
                       "haw", # hawaiian and pacific islander
                       variable),
         variable = gsub("B01001F",
                       "other",
                       variable),
         variable = gsub("B01001G",
                       "two", # two or more 
                       variable),
                  variable = gsub("B01001H",
                       "wn", # white non hispanic and latino
                       variable),
         variable = gsub("B01001I",
                       "hisp", # hispanic and latino
                       variable),
         variable = gsub("B01001_",
                       "total_",
                       variable)) %>% 
  separate_wider_delim(cols = 3,
                       delim = "_",
                       names = c("group", "age_variable")) 


#plot(ethnicity$geometry)
#unique(all_eth$census_tract)  
#unique(ah_tracts_eth$census_tract)

ah_tracts_eth <- sex_age_ethnicity %>% 
  filter(census_tract %in% ah_tracts) %>% 
  filter(age_variable == "001") %>% 
  select(-c(GEOID, age_variable, moe, geometry)) %>% 
  mutate(status = "affordable")
  # pivot_wider(names_from = group,
  #             values_from = estimate)

ah_tot <- ah_tracts_eth %>% 
  filter(group == "total") %>% 
  select(census_tract, group, estimate) %>% 
  rename(totals = group,
         total_est = estimate)

ah_tracts_eth <- ah_tracts_eth %>% 
  filter(group != "total") %>% 
  left_join(ah_tot, by = "census_tract") %>% 
  mutate(percent = round((estimate/total_est)*100, 1)) %>% 
  select(census_tract, group, estimate, percent, status) 

all_eth <- sex_age_ethnicity %>% 
  filter(census_tract %in% rent_burdened$census_tract) %>% 
  filter(age_variable == "001") %>% 
  select(-c(GEOID, age_variable, moe, geometry)) %>% 
  mutate(status = "non")

all_tot <- all_eth %>% 
  filter(group == "total") %>% 
  select(census_tract, group, estimate) %>% 
  rename(totals = group,
         total_est = estimate)

all_eth <- all_eth %>% 
  filter(group != "total") %>% 
  left_join(all_tot, by = "census_tract") %>% 
  mutate(percent = round((estimate/total_est)*100, 1)) %>% 
  select(census_tract, group, estimate, percent, status) 

# combine the tracts
compare <- all_eth %>% 
  rbind(ah_tracts_eth) %>% 
  rename(groups = group) %>% 
  filter(!groups %in% c("ai", "haw", "two"))
showtext_auto()
# Custom labels for x-axis
custom_labels <- c("White (non Hispanic or Latino)", "White", "Other", "Hispanic", "Black", "Asian")

compare_plot <- ggplot(compare) +
  geom_boxplot(aes(x = groups, y = percent, fill = status), 
               color = "white") +
  scale_fill_manual(values = c("#67000D", "#c19089")) +
  coord_flip()

compare_plot +  
  labs(y = "Percent of Census Tract", 
       x = "",
       fill = "",
       subtitle = "The demographics of the census tracts in all of the <span style = 'color: #c19089; font-size: 20pt;'>**Los Angeles Metropolitan Area**</span> vs tracts that <span style = 'color: #67000D; font-size: 20pt;'>**have built affordable housing**</span> since 2003.") +
  scale_x_discrete(labels = rev(custom_labels)) +
  theme_minimal()

# subtitle = "The demographics of the census tracts in all of the <span style = 'color: #c19089; font-size: 20pt;'>**Los Angeles Metropolitan Area**</span> vs tracts that <span style = 'color: #67000D; font-size: 20pt;'>**have built affordable housing**</span> since 2003.")
# for clarity: the estimate:total is the sum of the total male and the total female: could be used for total number of that ethnicity
ethnicity <- sex_age_ethnicity %>% 
  filter(census_tract %in% on_plot$census_tract) %>% 
  filter(age_variable == "001") %>% 
  select(-c(GEOID, age_variable, moe))

sum_on_plot <- on_plot %>% 
  left_join(ethnicity, by = "census_tract") %>% 
  select(-census_tract) %>%
  st_drop_geometry() %>% 
  group_by(area, group) %>% 
  summarize(estimate = sum(estimate))
  
ggplot() +
  geom_col(data = sum_on_plot, aes(x = area, y = estimate, fill = group),
           position = "dodge")

Making shapefiles for the census tracts

# high burden, no affordable housing
# burden_non_shp
burden_non %>%
  st_drop_geometry() %>% 
  head(1) %>% 
  select(extreme, very_low, low, not_eligible, total_households, # sum of the other columns
         percent_eligible
         ) %>%
  mutate(p_ex = round((extreme/total_households)*100, 0),
         p_vl = round((very_low/total_households)*100, 0),
         p_l = round((low/total_households)*100, 0),
         p_non = round((not_eligible/total_households)*100, 0)) %>% 
  select(p_ex, p_vl, p_l, p_non) %>%
  pivot_longer(cols = 1:4,
               names_to = "status",
               values_to = "percent") %>%
  mutate(name = "low_burden") %>% 
  ggplot() +
  geom_bar(aes(x = 1, y = percent, fill = status), 
           stat = "identity", position = "stack") +
  scale_fill_viridis(discrete = TRUE) +
  theme_minimal() +
  labs(x = NULL, y = "Percentage", title = "Not Burdened")

# high burden, affordable housing
#buden_ah_shp <- 
burden_ah %>% 
  st_drop_geometry() %>% 
  head(1) %>% 
  select(extreme, very_low, low, not_eligible, total_households, # sum of the other columns
         percent_eligible
         ) %>%
  mutate(p_ex = round((extreme/total_households)*100, 0),
         p_vl = round((very_low/total_households)*100, 0),
         p_l = round((low/total_households)*100, 0),
         p_non = round((not_eligible/total_households)*100, 0)) %>% 
  select(p_ex, p_vl, p_l, p_non) %>%
  pivot_longer(cols = 1:4,
               names_to = "status",
               values_to = "percent") %>%
  mutate(name = "low_burden") %>% 
  ggplot() +
  geom_bar(aes(x = 1, y = percent, fill = status), 
           stat = "identity", position = "stack") +
  scale_fill_viridis(discrete = TRUE) +
  theme_minimal() +
  labs(x = NULL, y = "Percentage", title = "Burden with AH")

# low burden, with affordable housing - and also have high percentage eligible for affordable housing
#low_burden_shp <- 
library(viridis)
lowburden_ah %>% 
  st_drop_geometry() %>% 
  head(1) %>% 
  select(extreme, very_low, low, not_eligible, total_households, # sum of the other columns
         percent_eligible
         ) %>%
  mutate(p_ex = round((extreme/total_households)*100, 0),
         p_vl = round((very_low/total_households)*100, 0),
         p_l = round((low/total_households)*100, 0),
         p_non = round((not_eligible/total_households)*100, 0)) %>%
  select(p_ex, p_vl, p_l, p_non) %>% 
  pivot_longer(cols = 1:4,
               names_to = "status",
               values_to = "percent") %>%
  mutate(name = "low_burden",
         ypos = c(50, 95, 75, 17), # location of labels on the plot
         status = as.factor(status)) %>% 
  ggplot() +
  geom_bar(aes(x = 1, y = percent, fill = reorder(status, percent)), 
           stat = "identity", position = "stack") +
  # Add text
  geom_text(
    aes(1, y = ypos, label = reorder(status, percent)),
    hjust = 0,
    nudge_x = 0,
    colour = "white",
    size = 7,
    family = "alice"
  ) +
  scale_fill_viridis(discrete = TRUE) +
  labs(x = NULL, y = "Percentage", title = "Low Burden with AH") +
  plot_theme
# Load necessary libraries
library(ggplot2)
library(XML)
library(svglite)

# Example data
data <- data.frame(
  category = c("A", "B", "C"),
  value1 = c(10, 20, 30),
  value2 = c(15, 25, 35)
)

# Create SVG plot
svg("file:///Users/lunacatalan/Downloads/8z5bfP01.svg", width = 500, height = 500)

# Read SVG image outline
svg_image <- xmlParse("file:///Users/lunacatalan/Downloads/8z5bfP01.svg")

# Extract the image dimensions
svg_dims <- xmlAttrs(xmlRoot(svg_image))

# Create the data visualization on top of the SVG image
plot_gg<- ggplot(data, aes(x = category)) +
  geom_bar(aes(y = value1, fill = "Value1"), stat = "identity") +
  geom_bar(aes(y = value2, fill = "Value2"), stat = "identity") +
  scale_fill_manual(values = c("Value1" = "blue", "Value2" = "red")) +
  theme_minimal()

# Retrieve the plot
plot <- ggplot_gtable(ggplot_build(plot_gg))

# Embed the SVG image outline as the background
print(xmlSVG(svg_image), vp = viewport(width = as.numeric(svg_dims["width"]),
                                       height = as.numeric(svg_dims["height"]),
                                       xmax = 1, ymax = 1, 
                                       just = c("left", "bottom")))

# Print the data visualization on top of the SVG image
grid.draw(plot)

# End SVG plot
dev.off()